library(tidyverse)
Registered S3 methods overwritten by 'dbplyr':
method from
print.tbl_lazy
print.tbl_sql
── Attaching packages ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.1 ──
✔ ggplot2 3.3.6 ✔ purrr 0.3.4
✔ tibble 3.1.7 ✔ dplyr 1.0.9
✔ tidyr 1.2.0 ✔ stringr 1.4.0
✔ readr 2.1.2 ✔ forcats 0.5.1
── Conflicts ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
library(janitor)
Attaching package: ‘janitor’
The following objects are masked from ‘package:stats’:
chisq.test, fisher.test
library(skimr)
library(tsibble)
Attaching package: ‘tsibble’
The following objects are masked from ‘package:base’:
intersect, setdiff, union
library(lubridate)
Attaching package: ‘lubridate’
The following object is masked from ‘package:tsibble’:
interval
The following objects are masked from ‘package:base’:
date, intersect, setdiff, union
beds <- read_csv("raw_data/non_covid_raw_data/beds_by_nhs_board_of_treatment_and_specialty.csv") %>%
clean_names()
Warning: One or more parsing issues, see `problems()` for details
Rows: 30458 Columns: 20
── Column specification ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (10): Quarter, QuarterQF, HB, HBQF, Location, LocationQF, Specialty, SpecialtyQF, SpecialtyName, SpecialtyNameQF
dbl (5): AllStaffedBeddays, TotalOccupiedBeddays, AverageAvailableStaffedBeds, AverageOccupiedBeds, PercentageOccupancy
lgl (5): AllStaffedBeddaysQF, TotalOccupiedBeddaysQF, AverageAvailableStaffedBedsQF, AverageOccupiedBedsQF, PercentageOccupancyQF
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
glimpse(beds)
skim(beds)
view(beds)
beds %>%
arrange(desc(all_staffed_beddays))
beds %>%
distinct(location)
beds %>%
distinct(hb)
beds %>%
distinct(all_staffed_beddays)
beds %>%
distinct(total_occupied_beddays)
beds %>%
ggplot(aes(x = all_staffed_beddays)) +
geom_histogram(col = "white")+
scale_x_log10()
beds %>%
ggplot(aes(x = total_occupied_beddays)) +
geom_histogram(col = "white")+
scale_x_log10()
beds %>%
ggplot(aes(x = average_occupied_beds)) +
geom_histogram(col = "white")+
scale_x_log10()
beds %>%
ggplot(aes(x = percentage_occupancy)) +
geom_histogram(col = "white")
beds_select <- beds %>%
mutate(date = yq(quarter),
year = year(date),
month = month(date, label = TRUE, abbr = FALSE),
season = case_when(
str_detect(month, "January") ~ "Winter",
str_detect(month, "April") ~ "Spring",
str_detect(month, "July") ~ "Summer",
str_detect(month, "October") ~ "Autumn"),
season = factor(season, order = TRUE)) %>%
select(quarter, hb, location, specialty_name, all_staffed_beddays, total_occupied_beddays, average_available_staffed_beds, average_occupied_beds, percentage_occupancy, date, year, month, season)
beds_select %>%
filter(!is.na(percentage_occupancy)) %>%
group_by(quarter) %>%
summarise(mean_percentage_occupancy = mean(percentage_occupancy)) %>%
ggplot(aes(x = quarter,
y = mean_percentage_occupancy)) +
geom_point() +
geom_line(group = 1)

beds_select %>%
filter(!is.na(all_staffed_beddays)) %>%
group_by(quarter) %>%
summarise(mean_staffed_beddays = mean(all_staffed_beddays)) %>%
ggplot(aes(x = quarter,
y = mean_staffed_beddays)) +
geom_point() +
geom_line(group = 1)

beds_select %>%
filter(!is.na(average_available_staffed_beds)) %>%
group_by(quarter) %>%
summarise(mean_avg_staffed_beddays = mean(average_available_staffed_beds)) %>%
ggplot(aes(x = quarter,
y = mean_avg_staffed_beddays)) +
geom_point() +
geom_line(group = 1)

beds_select %>%
filter(!is.na(average_available_staffed_beds)) %>%
mutate(empty_beddays = all_staffed_beddays - total_occupied_beddays) %>%
group_by(quarter) %>%
summarise(mean_empty = mean(empty_beddays)) %>%
ggplot(aes(x = quarter,
y= mean_empty)) +
geom_point() +
geom_line(group = 1)

beds_select %>%
filter(!is.na(percentage_occupancy)) %>%
group_by(quarter, hb) %>%
summarise(mean_percentage_occupancy = mean(percentage_occupancy)) %>%
ggplot(aes(x = quarter,
y = mean_percentage_occupancy)) +
geom_point() +
geom_line(aes(group = hb, colour = hb))
`summarise()` has grouped output by 'quarter'. You can override using the `.groups` argument.

`
beds_select %>%
filter(!is.na(average_occupied_beds)) %>%
group_by(quarter) %>%
summarise(mean_average_occupied_beds = mean(average_occupied_beds)) %>%
ggplot(aes(x = quarter,
y = mean_average_occupied_beds)) +
geom_point() +
geom_line(group = 1)

beds_select %>%
group_by(year) %>%
summarise(mean_year_bed = mean(average_occupied_beds)) %>%
ggplot(aes(x = year,
y = mean_year_bed)) +
geom_line()

beds_select %>%
group_by(season) %>%
summarise(mean_year_bed = mean(average_occupied_beds)) %>%
ggplot(aes(x = season,
y = mean_year_bed)) +
geom_col() +
scale_x_discrete(limits = c("Spring", "Summer", "Autumn", "Winter"))

beds_select %>%
filter(!is.na(percentage_occupancy)) %>%
group_by(season) %>%
summarise(mean_prct_beds = mean(percentage_occupancy)) %>%
ggplot(aes(x = season,
y = mean_prct_beds)) +
geom_col() +
scale_x_discrete(limits = c("Spring", "Summer", "Autumn", "Winter"))

beds_select %>%
filter(!is.na(total_occupied_beddays)) %>%
group_by(season) %>%
summarise(mean_occupied_beddays = mean(total_occupied_beddays)) %>%
ggplot(aes(x = season,
y = mean_occupied_beddays)) +
geom_col() +
scale_x_discrete(limits = c("Spring", "Summer", "Autumn", "Winter"))

beds_select %>%
filter(quarter == "2016Q4")
beds_select %>%
ggplot(aes(x = season,
y = percentage_occupancy)) +
geom_col() +
facet_wrap(~ specialty_name)
beds_select %>%
filter(!is.na(percentage_occupancy)) %>%
group_by(specialty_name, month) %>%
summarise(mean_pct_speciality = mean(percentage_occupancy)) %>%
ggplot(aes(x = month,
y = mean_pct_speciality)) +
geom_point() +
geom_line(aes(group = specialty_name, colour = specialty_name))+
theme(legend.position = "none")
`summarise()` has grouped output by 'specialty_name'. You can override using the `.groups` argument.

beds_select %>%
filter(percentage_occupancy == 100,
year == 2017) %>%
group_by(season, hb) %>%
summarise(count = n()) %>%
ggplot(aes(x = season,
y = count)) +
geom_col(aes(fill = hb), position = "dodge")
`summarise()` has grouped output by 'season'. You can override using the `.groups` argument.

beds_select %>%
mutate(bins =
case_when(percentage_occupancy < 25 ~ "<25",
percentage_occupancy < 50 ~ "25-50",
percentage_occupancy < 75 ~ "50-75",
percentage_occupancy > 75 ~ ">75"
)
) %>%
filter(bins == "<25") %>%
group_by(season) %>%
summarise(count = n())
simd_treatment %>%
filter(!is.na(average_length_of_stay)) %>%
group_by(quarter) %>%
summarise(mean_avg_stay = mean(average_length_of_stay)) %>%
ggplot(aes(x = quarter,
y = mean_avg_stay)) +
geom_point() +
geom_line(group = 1)

simd_treatment %>%
filter(!is.na(average_length_of_stay)) %>%
group_by(quarter, admission_type) %>%
summarise(mean_avg_stay = mean(average_length_of_stay)) %>%
ggplot(aes(x = quarter,
y = mean_avg_stay)) +
geom_point() +
geom_line(aes(group = admission_type, colour = admission_type))
`summarise()` has grouped output by 'quarter'. You can override using the `.groups` argument.

simd_treatment %>%
filter(!is.na(average_length_of_stay),
simd == 5) %>%
mutate(simd = replace_na(simd, 0)) %>%
group_by(quarter, simd) %>%
summarise(mean_avg_stay = mean(average_length_of_stay)) %>%
ggplot(aes(x = quarter,
y = mean_avg_stay)) +
geom_point() +
geom_line(aes(group = simd, colour = simd))
`summarise()` has grouped output by 'quarter'. You can override using the `.groups` argument.

simd_treatment %>%
filter(!is.na(average_length_of_stay),
simd == 1) %>%
mutate(simd = replace_na(simd, 0)) %>%
group_by(quarter, simd) %>%
summarise(mean_avg_stay = mean(average_length_of_stay)) %>%
ggplot(aes(x = quarter,
y = mean_avg_stay)) +
geom_point() +
geom_line(aes(group = simd, colour = simd))
`summarise()` has grouped output by 'quarter'. You can override using the `.groups` argument.

simd_treatment %>%
filter(!is.na(average_length_of_episode)) %>%
group_by(quarter, admission_type) %>%
summarise(mean_avg_epidsode = mean(average_length_of_episode)) %>%
ggplot(aes(x = quarter,
y = mean_avg_epidsode)) +
geom_point() +
geom_line(aes(group = admission_type, colour = admission_type))
`summarise()` has grouped output by 'quarter'. You can override using the `.groups` argument.

simd_treatment %>%
filter(!is.na(average_length_of_episode),
simd == 5) %>%
group_by(quarter) %>%
summarise(mean_avg_episode = mean(average_length_of_episode)) %>%
ggplot(aes(x = quarter,
y = mean_avg_episode)) +
geom_point() +
geom_line(group = 1)

simd_treatment %>%
filter(!is.na(average_length_of_episode),
simd == 1) %>%
group_by(quarter) %>%
summarise(mean_avg_episode = mean(average_length_of_episode)) %>%
ggplot(aes(x = quarter,
y = mean_avg_episode)) +
geom_point() +
geom_line(group = 1)

---
title: "R Notebook"
output: html_notebook
---

```{r}
library(tidyverse)
library(janitor)
library(skimr)
library(tsibble)
library(lubridate)
```


```{r}
beds <- read_csv("raw_data/non_covid_raw_data/beds_by_nhs_board_of_treatment_and_specialty.csv") %>% 
  clean_names()
```


```{r}
glimpse(beds)
```


```{r}
skim(beds)
```


```{r}
view(beds)
```


```{r}
beds %>% 
  arrange(desc(all_staffed_beddays))
```


```{r}
beds %>% 
  distinct(location)

beds %>% 
  distinct(hb)

beds %>% 
  distinct(all_staffed_beddays)

beds %>% 
  distinct(total_occupied_beddays)
```


```{r}
beds %>% 
  ggplot(aes(x = all_staffed_beddays)) +
  geom_histogram(col = "white")+
  scale_x_log10()
```


```{r}
beds %>% 
  ggplot(aes(x = total_occupied_beddays)) +
  geom_histogram(col = "white")+
  scale_x_log10()
```


```{r}
beds %>% 
  ggplot(aes(x = average_occupied_beds)) +
  geom_histogram(col = "white")+
  scale_x_log10()
```


```{r}
beds %>% 
  ggplot(aes(x = percentage_occupancy)) +
  geom_histogram(col = "white")
```


```{r}
beds_select <- beds %>% 
  mutate(date = yq(quarter),
         year = year(date),
         month = month(date, label = TRUE, abbr = FALSE),
         season = case_when(
           str_detect(month, "January") ~ "Winter",
           str_detect(month, "April") ~ "Spring",
           str_detect(month, "July") ~ "Summer",
           str_detect(month, "October") ~ "Autumn"),
         season = factor(season, order = TRUE)) %>% 
  select(quarter, hb, location, specialty_name, all_staffed_beddays, total_occupied_beddays, average_available_staffed_beds, average_occupied_beds, percentage_occupancy, date, year, month, season)
```


```{r}
beds_select %>%
  filter(!is.na(percentage_occupancy)) %>% 
  group_by(quarter) %>% 
  summarise(mean_percentage_occupancy = mean(percentage_occupancy)) %>% 
  ggplot(aes(x = quarter,
             y = mean_percentage_occupancy)) +
  geom_point() +
  geom_line(group = 1)
```


```{r}
beds_select %>%
  filter(!is.na(total_occupied_beddays)) %>% 
  group_by(quarter) %>% 
  summarise(mean_occupied_beddays = mean(total_occupied_beddays)) %>% 
  ggplot(aes(x = quarter,
             y = mean_occupied_beddays)) +
  geom_point() +
  geom_line(group = 1)
```


```{r}
beds_select %>%
  filter(!is.na(all_staffed_beddays)) %>% 
  group_by(quarter) %>% 
  summarise(mean_staffed_beddays = mean(all_staffed_beddays)) %>% 
  ggplot(aes(x = quarter,
             y = mean_staffed_beddays)) +
  geom_point() +
  geom_line(group = 1)
```


```{r}
beds_select %>%
  filter(!is.na(average_available_staffed_beds)) %>% 
  group_by(quarter) %>% 
  summarise(mean_avg_staffed_beddays = mean(average_available_staffed_beds)) %>% 
  ggplot(aes(x = quarter,
             y = mean_avg_staffed_beddays)) +
  geom_point() +
  geom_line(group = 1)
```


```{r}
beds_select %>%
  filter(!is.na(average_available_staffed_beds)) %>% 
  mutate(empty_beddays = all_staffed_beddays - total_occupied_beddays) %>% 
  group_by(quarter) %>%
  summarise(mean_empty = mean(empty_beddays)) %>% 
  ggplot(aes(x = quarter,
             y= mean_empty)) +
  geom_point() +
  geom_line(group = 1)
```



```{r}
beds_select %>%
  filter(!is.na(percentage_occupancy)) %>% 
  group_by(quarter, hb) %>% 
  summarise(mean_percentage_occupancy = mean(percentage_occupancy)) %>% 
  ggplot(aes(x = quarter,
             y = mean_percentage_occupancy)) +
  geom_point() +
  geom_line(aes(group = hb, colour = hb))
```


`
```{r}
beds_select %>%
  filter(!is.na(average_occupied_beds)) %>% 
  group_by(quarter) %>% 
  summarise(mean_average_occupied_beds = mean(average_occupied_beds)) %>% 
  ggplot(aes(x = quarter,
             y = mean_average_occupied_beds)) +
  geom_point() +
  geom_line(group = 1)
```



```{r}
beds_select %>% 
  group_by(year) %>% 
  summarise(mean_year_bed = mean(average_occupied_beds)) %>% 
  ggplot(aes(x = year,
             y = mean_year_bed)) +
  geom_line()
```


```{r}
beds_select %>% 
  group_by(season) %>% 
  summarise(mean_year_bed = mean(average_occupied_beds)) %>% 
  ggplot(aes(x = season,
             y = mean_year_bed)) +
  geom_col() + 
  scale_x_discrete(limits = c("Spring", "Summer", "Autumn", "Winter"))
```



```{r}
beds_select %>% 
  filter(!is.na(percentage_occupancy)) %>% 
  group_by(season) %>% 
  summarise(mean_prct_beds = mean(percentage_occupancy)) %>% 
  ggplot(aes(x = season,
             y = mean_prct_beds)) +
  geom_col() +
  scale_x_discrete(limits = c("Spring", "Summer", "Autumn", "Winter"))
```


```{r}
beds_select %>% 
  filter(!is.na(total_occupied_beddays)) %>% 
  group_by(season) %>% 
  summarise(mean_occupied_beddays = mean(total_occupied_beddays)) %>% 
  ggplot(aes(x = season,
             y = mean_occupied_beddays)) +
  geom_col() +
  scale_x_discrete(limits = c("Spring", "Summer", "Autumn", "Winter"))
```


```{r}
beds_select %>% 
  filter(quarter == "2016Q4")
```


```{r}
beds_select %>% 
  ggplot(aes(x = season,
             y = percentage_occupancy)) +
  geom_col() +
  facet_wrap(~ specialty_name)
```


```{r}
beds_select %>% 
  filter(!is.na(percentage_occupancy)) %>% 
  group_by(specialty_name, month) %>% 
  summarise(mean_pct_speciality = mean(percentage_occupancy)) %>% 
  ggplot(aes(x = month,
             y = mean_pct_speciality)) +
  geom_point() +
  geom_line(aes(group = specialty_name, colour = specialty_name))+
  theme(legend.position = "none")
```



```{r}
beds_select %>%
  filter(percentage_occupancy == 100,
         year == 2017) %>% 
  group_by(season, hb) %>% 
  summarise(count = n()) %>% 
  ggplot(aes(x = season,
             y = count)) +
  geom_col(aes(fill = hb), position = "dodge")

```


```{r}
beds_select %>% 
  mutate(bins = 
    case_when(percentage_occupancy < 25 ~ "<25",
              percentage_occupancy < 50 ~ "25-50",
              percentage_occupancy < 75 ~ "50-75",
              percentage_occupancy > 75 ~ ">75"
    )
  ) %>% 
  filter(bins == "<25") %>% 
  group_by(season) %>% 
  summarise(count = n())
```
################################################################################
################################################################################
################################################################################





```{r}
simd_treatment <- read_csv("raw_data/non_covid_raw_data/inpatient_and_daycase_by_nhs_board_of_treatment_and_simd.csv") %>% clean_names()
```


```{r}
simd_treatment %>% 
  filter(!is.na(average_length_of_stay)) %>% 
  group_by(quarter) %>% 
  summarise(mean_avg_stay = mean(average_length_of_stay)) %>% 
  ggplot(aes(x = quarter,
             y = mean_avg_stay)) +
  geom_point() +
  geom_line(group = 1)
```


```{r}
simd_treatment %>% 
  filter(!is.na(average_length_of_stay)) %>% 
  group_by(quarter, admission_type) %>% 
  summarise(mean_avg_stay = mean(average_length_of_stay)) %>% 
  ggplot(aes(x = quarter,
             y = mean_avg_stay)) +
  geom_point() +
  geom_line(aes(group = admission_type, colour = admission_type))
```


```{r}
simd_treatment %>% 
  filter(!is.na(average_length_of_stay),
         simd == 5) %>% 
  mutate(simd = replace_na(simd, 0)) %>% 
  group_by(quarter, simd) %>% 
  summarise(mean_avg_stay = mean(average_length_of_stay)) %>% 
  ggplot(aes(x = quarter,
             y = mean_avg_stay)) +
  geom_point() +
  geom_line(aes(group = simd, colour = simd))
```


```{r}
simd_treatment %>% 
  filter(!is.na(average_length_of_stay),
         simd == 1) %>% 
  mutate(simd = replace_na(simd, 0)) %>% 
  group_by(quarter, simd) %>% 
  summarise(mean_avg_stay = mean(average_length_of_stay)) %>% 
  ggplot(aes(x = quarter,
             y = mean_avg_stay)) +
  geom_point() +
  geom_line(aes(group = simd, colour = simd))
```



```{r}
simd_treatment %>% 
  filter(!is.na(average_length_of_stay)) %>% 
  mutate(simd = replace_na(simd, 0)) %>% 
  group_by(quarter, simd) %>% 
  summarise(mean_avg_stay = mean(average_length_of_stay)) %>% 
  ggplot(aes(x = quarter,
             y = mean_avg_stay)) +
  geom_point() +
  geom_line(aes(group = simd, colour = simd))
```



```{r}
simd_treatment %>% 
  filter(!is.na(average_length_of_episode)) %>% 
  group_by(quarter) %>% 
  summarise(mean_avg_episode = mean(average_length_of_episode)) %>% 
  ggplot(aes(x = quarter,
             y = mean_avg_episode)) +
  geom_point() +
  geom_line(group = 1)
```


```{r}
simd_treatment %>% 
  filter(!is.na(average_length_of_episode)) %>% 
  group_by(quarter, admission_type) %>% 
  summarise(mean_avg_epidsode = mean(average_length_of_episode)) %>% 
  ggplot(aes(x = quarter,
             y = mean_avg_epidsode)) +
  geom_point() +
  geom_line(aes(group = admission_type, colour = admission_type))
```


```{r}
simd_treatment %>% 
  filter(!is.na(average_length_of_episode),
         simd == 5) %>% 
  group_by(quarter) %>% 
  summarise(mean_avg_episode = mean(average_length_of_episode)) %>% 
  ggplot(aes(x = quarter,
             y = mean_avg_episode)) +
  geom_point() +
  geom_line(group = 1)
```


```{r}
simd_treatment %>% 
  filter(!is.na(average_length_of_episode),
         simd == 1) %>% 
  group_by(quarter) %>% 
  summarise(mean_avg_episode = mean(average_length_of_episode)) %>% 
  ggplot(aes(x = quarter,
             y = mean_avg_episode)) +
  geom_point() +
  geom_line(group = 1)
```



